library(BSgenome.Hsapiens.UCSC.hg38)
library(plyranges)
library(tidyverse)
library(magrittr)
library(cowplot)
WIN_SIZE = 1000
RADIUS = 300
EPSILON = 30
TPM = 5
N_CTS_LOW = 10
N_CTS_HIGH = 80
COLOR_VALS_CPA=c(TGTA="#73ADD6", AWTAAA="#358C44", TKTKTK="#F36B4D")
COLOR_VALS_CPA_PA=c(TGTA="#73ADD6", AWTAAA="#358C44", TKTKTK="#F36B4D", AAAAAA="#888888")
COLOR_VALS_EXTRA=c(TGTA="#73ADD6", AWTAAA="#358C44", TKTKTK="#F36B4D", CAATTA="orchid4")
FILE_UTROME = sprintf("data/granges/utrome_gr_txs.e%d.t%d.gc39.pas3.f0.9999.w500.Rds", EPSILON, TPM)
FILE_BED = sprintf("data/bed/celltypes/celltypes.e%d.t%d.bed.gz", EPSILON, TPM)
FILE_BED_UNLIKELY = sprintf("data/bed/cleavage-sites/utrome.unlikely.e%d.t%d.gc39.pas3.f0.9999.bed.gz",
EPSILON, TPM)
get_centered_motif_sites <- function (motif, seqs) {
seqs %>%
vmatchPattern(pattern=motif, fixed=FALSE) %>%
unlist %>% as.data.frame %>%
mutate(motif=motif,
start=start - WIN_SIZE/2,
end=end - WIN_SIZE/2,
center=(end + start)/2) %>%
select(motif, center, start, end)
}
## plot density from motif positions data.frame
plot_motif_density <- function (df_motifs, RADIUS=300, title="UTRome",
col_values=COLOR_VALS_CPA) {
df_motifs %>%
ggplot(aes(x=center, color=motif)) +
stat_density(aes(y=..scaled..), geom='line', position='identity',
size=1.5, alpha=0.9) +
geom_hline(yintercept=0) +
geom_vline(xintercept=0, linetype='dashed', color='black') +
coord_cartesian(xlim=c(-RADIUS, RADIUS)) +
scale_x_continuous(breaks=seq(-RADIUS, RADIUS, 100),
limits=c(-RADIUS - 100, RADIUS+100)) +
scale_color_manual(values=col_values) +
labs(x=sprintf("Distance from Cleavage Site (%s)", title),
y="Relative Density", color="Motif") +
guides(color=guide_legend(override.aes=list(alpha=1, size=3))) +
theme_minimal_vgrid()
}
## plot count from motif positions data.frame
plot_motif_count <- function (df_motifs, RADIUS=300, title="UTRome",
col_values=COLOR_VALS_CPA) {
df_motifs %>%
ggplot(aes(x=center, color=motif)) +
stat_density(aes(y=..count..), geom='line', position='identity',
size=1.5, alpha=0.9) +
geom_hline(yintercept=0) +
geom_vline(xintercept=0, linetype='dashed', color='black') +
coord_cartesian(xlim=c(-RADIUS, RADIUS)) +
scale_x_continuous(breaks=seq(-RADIUS, RADIUS, 100),
limits=c(-RADIUS - 100, RADIUS+100)) +
scale_color_manual(values=col_values) +
labs(x=sprintf("Distance from Cleavage Site (%s)", title),
y="Count", color="Motif") +
guides(color=guide_legend(override.aes=list(alpha=1, size=3))) +
theme_minimal_vgrid()
}
## plot unscaled density from motif positions data.frame
plot_motif_density_ns <- function (df_motifs, RADIUS=300, title="UTRome",
col_values=COLOR_VALS_CPA) {
df_motifs %>%
ggplot(aes(x=center, color=motif)) +
stat_density(geom='line', size=1.5, alpha=0.9, position="identity") +
geom_hline(yintercept=0) +
geom_vline(xintercept=0, linetype='dashed', color='black') +
coord_cartesian(xlim=c(-RADIUS, RADIUS)) +
scale_x_continuous(breaks=seq(-RADIUS, RADIUS, 100),
limits=c(-RADIUS - 100, RADIUS+100)) +
scale_color_manual(values=col_values) +
labs(x=sprintf("Distance from Cleavage Site (%s)", title),
y="Density", color="Motif") +
guides(color=guide_legend(override.aes=list(alpha=1, size=3))) +
theme_minimal_vgrid()
}
gr_sites <- read_bed(FILE_BED) %>%
`seqlevelsStyle<-`("UCSC") %>%
keepStandardChromosomes(pruning.mode="coarse") %>%
anchor_center() %>%
mutate(width=EPSILON)
## Load all transcripts
gr_txs <- readRDS(FILE_UTROME)
## focus on cleavage sites
gr_cleavage <- gr_txs %>%
anchor_3p %>%
mutate(n_celltypes=count_overlaps_directed(mutate(., width=0), gr_sites)) %>%
mutate(width=WIN_SIZE) %>%
shift_downstream(WIN_SIZE/2) %>%
mutate(origin=ifelse(is_novel, "UTRome", "GENCODE"),
origin_ud=case_when(
!is_novel ~ "GENCODE",
str_detect(transcript_id, "UTR-") ~ "upstream",
str_detect(transcript_id, "UTR+") ~ "downstream"
))
gr_ip <- read_bed(FILE_BED_UNLIKELY) %>%
anchor_3p %>%
mutate(width=WIN_SIZE) %>%
shift_downstream(WIN_SIZE/2)
gr_single <- filter(gr_cleavage, utr_type == "single")
gr_ipa <- filter(gr_cleavage, is_ipa)
gr_multi_tandem <- filter(gr_cleavage, utr_type == 'multi', !is_ipa)
gr_gencode <- filter(gr_cleavage, !is_novel)
gr_novel <- filter(gr_cleavage, is_novel)
gr_proximal_gc <- filter(gr_cleavage, utr_type == 'multi', is_proximal, !is_novel)
gr_nondistal_gc <- filter(gr_cleavage, utr_type == 'multi', !is_distal, !is_novel)
gr_distal_gc <- filter(gr_cleavage, utr_type == 'multi', is_distal, !is_novel)
gr_proximal_novel <- filter(gr_cleavage, utr_type == 'multi', is_proximal, is_novel)
gr_nondistal_novel <- filter(gr_cleavage, utr_type == 'multi', !is_distal, is_novel)
gr_distal_novel <- filter(gr_cleavage, utr_type == 'multi', is_distal, is_novel)
## only GENCODE
gr_ctnone_gc <- filter(gr_cleavage, !is_novel, n_celltypes == 0)
gr_common <- filter(gr_cleavage, !is_novel, n_celltypes > 0)
gr_ctlow_gc <- filter(gr_cleavage, !is_novel, n_celltypes > 0, n_celltypes < N_CTS_LOW)
gr_ctmid_gc <- filter(gr_cleavage, !is_novel, n_celltypes >= N_CTS_LOW, n_celltypes < N_CTS_HIGH)
gr_cthigh_gc <- filter(gr_cleavage, !is_novel, n_celltypes >= N_CTS_HIGH)
gr_ctlow_novel <- filter(gr_cleavage, is_novel, n_celltypes < N_CTS_LOW)
gr_ctmid_novel <- filter(gr_cleavage, is_novel, n_celltypes >= N_CTS_LOW, n_celltypes < N_CTS_HIGH)
gr_cthigh_novel <- filter(gr_cleavage, is_novel, n_celltypes >= N_CTS_HIGH)
seqs <- getSeq(BSgenome.Hsapiens.UCSC.hg38, gr_cleavage)
df_tgta <- get_centered_motif_sites("TGTA", seqs)
df_pas <- get_centered_motif_sites(motif="AWTAAA", seqs)
df_tktktk <- get_centered_motif_sites(motif="TKTKTK", seqs)
rbind(df_tgta, df_pas, df_tktktk) %>%
plot_motif_density(title="All")
seqs <- getSeq(BSgenome.Hsapiens.UCSC.hg38, gr_single)
df_tgta <- get_centered_motif_sites("TGTA", seqs)
df_pas <- get_centered_motif_sites(motif="AWTAAA", seqs)
df_tktktk <- get_centered_motif_sites(motif="TKTKTK", seqs)
rbind(df_tgta, df_pas, df_tktktk) %>%
plot_motif_density(title="Single UTR Genes")
seqs <- getSeq(BSgenome.Hsapiens.UCSC.hg38, gr_ipa)
df_tgta <- get_centered_motif_sites("TGTA", seqs)
df_pas <- get_centered_motif_sites(motif="AWTAAA", seqs)
df_tktktk <- get_centered_motif_sites(motif="TKTKTK", seqs)
rbind(df_tgta, df_pas, df_tktktk) %>%
plot_motif_density(title="Intronic Sites")
seqs <- getSeq(BSgenome.Hsapiens.UCSC.hg38, gr_multi_tandem)
df_tgta <- get_centered_motif_sites("TGTA", seqs)
df_pas <- get_centered_motif_sites(motif="AWTAAA", seqs)
df_tktktk <- get_centered_motif_sites(motif="TKTKTK", seqs)
rbind(df_tgta, df_pas, df_tktktk) %>%
plot_motif_density(title="Tandem Sites")
seqs <- getSeq(BSgenome.Hsapiens.UCSC.hg38, gr_gencode)
df_tgta <- get_centered_motif_sites("TGTA", seqs)
df_pas <- get_centered_motif_sites(motif="AWTAAA", seqs)
df_tktktk <- get_centered_motif_sites(motif="TKTKTK", seqs)
rbind(df_tgta, df_pas, df_tktktk) %>%
plot_motif_density(title="GENCODE Sites")
seqs <- getSeq(BSgenome.Hsapiens.UCSC.hg38, gr_proximal_gc)
df_tgta <- get_centered_motif_sites("TGTA", seqs)
df_pas <- get_centered_motif_sites(motif="AWTAAA", seqs)
df_tktktk <- get_centered_motif_sites(motif="TKTKTK", seqs)
rbind(df_tgta, df_pas, df_tktktk) %>%
plot_motif_density(title="GENCODE Proximal Sites")
seqs <- getSeq(BSgenome.Hsapiens.UCSC.hg38, gr_nondistal_gc)
df_tgta <- get_centered_motif_sites("TGTA", seqs)
df_pas <- get_centered_motif_sites(motif="AWTAAA", seqs)
df_tktktk <- get_centered_motif_sites(motif="TKTKTK", seqs)
rbind(df_tgta, df_pas, df_tktktk) %>%
plot_motif_density(title="GENCODE Non-Distal Sites")
seqs <- getSeq(BSgenome.Hsapiens.UCSC.hg38, gr_distal_gc)
df_tgta <- get_centered_motif_sites("TGTA", seqs)
df_pas <- get_centered_motif_sites(motif="AWTAAA", seqs)
df_tktktk <- get_centered_motif_sites(motif="TKTKTK", seqs)
rbind(df_tgta, df_pas, df_tktktk) %>%
plot_motif_density(title="GENCODE Distal Sites")
seqs <- getSeq(BSgenome.Hsapiens.UCSC.hg38, gr_cthigh_gc)
df_tgta <- get_centered_motif_sites("TGTA", seqs)
df_pas <- get_centered_motif_sites(motif="AWTAAA", seqs)
df_tktktk <- get_centered_motif_sites(motif="TKTKTK", seqs)
rbind(df_tgta, df_pas, df_tktktk) %>%
plot_motif_density(title="GENCODE - Cell Types High")
seqs <- getSeq(BSgenome.Hsapiens.UCSC.hg38, gr_ctmid_gc)
df_tgta <- get_centered_motif_sites("TGTA", seqs)
df_pas <- get_centered_motif_sites(motif="AWTAAA", seqs)
df_tktktk <- get_centered_motif_sites(motif="TKTKTK", seqs)
rbind(df_tgta, df_pas, df_tktktk) %>%
plot_motif_density(title="GENCODE - Cell Types Midrange")
seqs <- getSeq(BSgenome.Hsapiens.UCSC.hg38, gr_ctlow_gc)
df_tgta <- get_centered_motif_sites("TGTA", seqs)
df_pas <- get_centered_motif_sites(motif="AWTAAA", seqs)
df_tktktk <- get_centered_motif_sites(motif="TKTKTK", seqs)
rbind(df_tgta, df_pas, df_tktktk) %>%
plot_motif_density(title="GENCODE - Cell Types Low")
seqs <- getSeq(BSgenome.Hsapiens.UCSC.hg38, gr_ctnone_gc)
df_tgta <- get_centered_motif_sites("TGTA", seqs)
df_pas <- get_centered_motif_sites(motif="AWTAAA", seqs)
df_tktktk <- get_centered_motif_sites(motif="TKTKTK", seqs)
rbind(df_tgta, df_pas, df_tktktk) %>%
plot_motif_density(title="GENCODE - No Supporting Cell Types")
ggsave("img/sq/sup1c-motif-density-gencode-hcl.pdf",
width=8, height=6, dpi=300)
## Warning: Removed 68753 rows containing non-finite values (`stat_density()`).
seqs <- getSeq(BSgenome.Hsapiens.UCSC.hg38, gr_ctnone_gc)
df_tgta <- get_centered_motif_sites("TGTA", seqs)
df_pas <- get_centered_motif_sites(motif="AWTAAA", seqs)
df_tktktk <- get_centered_motif_sites(motif="TKTKTK", seqs)
rbind(df_tgta, df_pas, df_tktktk) %>%
plot_motif_density_ns(title="GENCODE - No Supporting Cell Types")
seqs <- getSeq(BSgenome.Hsapiens.UCSC.hg38, gr_ctnone_gc)
df_tgta <- get_centered_motif_sites("TGTA", seqs)
df_pas <- get_centered_motif_sites(motif="AWTAAA", seqs)
df_tktktk <- get_centered_motif_sites(motif="TKTKTK", seqs)
rbind(df_tgta, df_pas, df_tktktk) %>%
plot_motif_count(title="GENCODE - No Supporting Cell Types")
seqs <- getSeq(BSgenome.Hsapiens.UCSC.hg38, gr_common)
df_tgta <- get_centered_motif_sites("TGTA", seqs)
df_pas <- get_centered_motif_sites(motif="AWTAAA", seqs)
df_tktktk <- get_centered_motif_sites(motif="TKTKTK", seqs)
rbind(df_tgta, df_pas, df_tktktk) %>%
plot_motif_density(title="GENCODE + HCL")
ggsave("img/sq/sup1c-motif-density-common-hcl.pdf",
width=8, height=6, dpi=300)
## Warning: Removed 85062 rows containing non-finite values (`stat_density()`).
seqs <- getSeq(BSgenome.Hsapiens.UCSC.hg38, gr_common)
df_tgta <- get_centered_motif_sites("TGTA", seqs)
df_pas <- get_centered_motif_sites(motif="AWTAAA", seqs)
df_tktktk <- get_centered_motif_sites(motif="TKTKTK", seqs)
rbind(df_tgta, df_pas, df_tktktk) %>%
plot_motif_density_ns(title="GENCODE + HCL")
seqs <- getSeq(BSgenome.Hsapiens.UCSC.hg38, gr_common)
df_tgta <- get_centered_motif_sites("TGTA", seqs)
df_pas <- get_centered_motif_sites(motif="AWTAAA", seqs)
df_tktktk <- get_centered_motif_sites(motif="TKTKTK", seqs)
rbind(df_tgta, df_pas, df_tktktk) %>%
plot_motif_count(title="GENCODE + HCL")
seqs <- getSeq(BSgenome.Hsapiens.UCSC.hg38, gr_novel)
df_tgta <- get_centered_motif_sites("TGTA", seqs)
df_pas <- get_centered_motif_sites(motif="AWTAAA", seqs)
df_tktktk <- get_centered_motif_sites(motif="TKTKTK", seqs)
rbind(df_tgta, df_pas, df_tktktk) %>%
plot_motif_density(title="Novel Sites")
ggsave("img/sq/sup1c-motif-density-utrome-hcl.pdf",
width=8, height=6, dpi=300)
## Warning: Removed 77133 rows containing non-finite values (`stat_density()`).
seqs <- getSeq(BSgenome.Hsapiens.UCSC.hg38, gr_novel)
df_tgta <- get_centered_motif_sites("TGTA", seqs)
df_pas <- get_centered_motif_sites(motif="AWTAAA", seqs)
df_tktktk <- get_centered_motif_sites(motif="TKTKTK", seqs)
rbind(df_tgta, df_pas, df_tktktk) %>%
plot_motif_density_ns(title="Novel Sites")
seqs <- getSeq(BSgenome.Hsapiens.UCSC.hg38, gr_novel)
df_tgta <- get_centered_motif_sites("TGTA", seqs)
df_pas <- get_centered_motif_sites(motif="AWTAAA", seqs)
df_tktktk <- get_centered_motif_sites(motif="TKTKTK", seqs)
rbind(df_tgta, df_pas, df_tktktk) %>%
plot_motif_count(title="Novel Sites")
seqs <- getSeq(BSgenome.Hsapiens.UCSC.hg38, gr_proximal_novel)
df_tgta <- get_centered_motif_sites("TGTA", seqs)
df_pas <- get_centered_motif_sites(motif="AWTAAA", seqs)
df_tktktk <- get_centered_motif_sites(motif="TKTKTK", seqs)
rbind(df_tgta, df_pas, df_tktktk) %>%
plot_motif_density(title="Novel Proximal Sites")
seqs <- getSeq(BSgenome.Hsapiens.UCSC.hg38, gr_nondistal_novel)
df_tgta <- get_centered_motif_sites("TGTA", seqs)
df_pas <- get_centered_motif_sites(motif="AWTAAA", seqs)
df_tktktk <- get_centered_motif_sites(motif="TKTKTK", seqs)
rbind(df_tgta, df_pas, df_tktktk) %>%
plot_motif_density(title="Novel Non-Distal Sites")
seqs <- getSeq(BSgenome.Hsapiens.UCSC.hg38, gr_distal_novel)
df_tgta <- get_centered_motif_sites("TGTA", seqs)
df_pas <- get_centered_motif_sites(motif="AWTAAA", seqs)
df_tktktk <- get_centered_motif_sites(motif="TKTKTK", seqs)
rbind(df_tgta, df_pas, df_tktktk) %>%
plot_motif_density(title="Novel Distal Sites")
seqs <- getSeq(BSgenome.Hsapiens.UCSC.hg38, gr_cthigh_novel)
df_tgta <- get_centered_motif_sites("TGTA", seqs)
df_pas <- get_centered_motif_sites(motif="AWTAAA", seqs)
df_tktktk <- get_centered_motif_sites(motif="TKTKTK", seqs)
rbind(df_tgta, df_pas, df_tktktk) %>%
plot_motif_density(title="Novel - Cell Types High")
seqs <- getSeq(BSgenome.Hsapiens.UCSC.hg38, gr_ctmid_novel)
df_tgta <- get_centered_motif_sites("TGTA", seqs)
df_pas <- get_centered_motif_sites(motif="AWTAAA", seqs)
df_tktktk <- get_centered_motif_sites(motif="TKTKTK", seqs)
rbind(df_tgta, df_pas, df_tktktk) %>%
plot_motif_density(title="Novel - Cell Types Midrange")
seqs <- getSeq(BSgenome.Hsapiens.UCSC.hg38, gr_ctlow_novel)
df_tgta <- get_centered_motif_sites("TGTA", seqs)
df_pas <- get_centered_motif_sites(motif="AWTAAA", seqs)
df_tktktk <- get_centered_motif_sites(motif="TKTKTK", seqs)
rbind(df_tgta, df_pas, df_tktktk) %>%
plot_motif_density(title="Novel - Cell Types Low")
seqs <- getSeq(BSgenome.Hsapiens.UCSC.hg38, gr_ip)
df_tgta <- get_centered_motif_sites("TGTA", seqs)
df_pas <- get_centered_motif_sites(motif="AWTAAA", seqs)
df_tktktk <- get_centered_motif_sites(motif="TKTKTK", seqs)
rbind(df_tgta, df_pas, df_tktktk) %>%
plot_motif_density(title="Internal Priming Sites")
seqs <- getSeq(BSgenome.Hsapiens.UCSC.hg38, gr_ip)
df_tgta <- get_centered_motif_sites("TGTA", seqs)
df_pas <- get_centered_motif_sites(motif="AWTAAA", seqs)
df_tktktk <- get_centered_motif_sites(motif="TKTKTK", seqs)
df_aaaaaa <- get_centered_motif_sites(motif="AAAAAA", seqs)
rbind(df_tgta, df_pas, df_tktktk, df_aaaaaa) %>%
plot_motif_density(title="Internal Priming Sites", col_values=COLOR_VALS_CPA_PA)
grs_cleavage <- gr_cleavage %>% split(.$origin)
seqs_cleavage <- lapply(grs_cleavage, . %>% getSeq(x=BSgenome.Hsapiens.UCSC.hg38))
lapply_get_centered_motif_sites <- function(motif, seqs=seqs_cleavage) {
lapply(seqs, . %>% get_centered_motif_sites(motif=motif)) %>%
do.call(what=rbind) %>%
mutate(origin=str_extract(rownames(.), "^[^.]+")) %>%
`rownames<-`(NULL)
}
df_tgta <- lapply_get_centered_motif_sites(motif="TGTA") %>%
mutate(origin=factor(origin, levels=c("GENCODE", "UTRome")))
df_pas <- lapply_get_centered_motif_sites(motif="AWTAAA") %>%
mutate(origin=factor(origin, levels=c("GENCODE", "UTRome")))
df_tktktk <- lapply_get_centered_motif_sites(motif="TKTKTK") %>%
mutate(origin=factor(origin, levels=c("GENCODE", "UTRome")))
df_combined <- rbind(df_tgta, df_pas, df_tktktk)
df_combined %>%
ggplot(aes(x=center, color=motif, linetype=origin)) +
stat_density(aes(y=..scaled..), geom='line', position='identity',
size=1.0, alpha=0.9) +
geom_hline(yintercept=0) +
geom_vline(xintercept=0, linetype='dashed', color='black') +
coord_cartesian(xlim=c(-RADIUS, RADIUS)) +
scale_x_continuous(breaks=seq(-RADIUS, RADIUS, 100),
limits=c(-RADIUS - 100, RADIUS+100)) +
scale_color_manual(values=c(TGTA="#73ADD6", AWTAAA="#358C44", TKTKTK="#F36B4D")) +
scale_linetype_manual(values=c("GENCODE"="solid", "UTRome"="dashed")) +
labs(x="Distance from RefSeq Cleavage Site",
y="Relative Density", color="Motif", linetype="Annotation") +
guides(color=guide_legend(override.aes=list(alpha=1, size=3))) +
theme_minimal_vgrid()
grs_cleavage_ud <- gr_cleavage %>% split(.$origin_ud)
seqs_cleavage_ud <- lapply(grs_cleavage_ud, . %>% getSeq(x=BSgenome.Hsapiens.UCSC.hg38))
lapply_get_centered_motif_sites <- function(motif, seqs=seqs_cleavage_ud) {
lapply(seqs, . %>% get_centered_motif_sites(motif=motif)) %>%
do.call(what=rbind) %>%
mutate(origin=str_extract(rownames(.), "^[^.]+")) %>%
`rownames<-`(NULL)
}
df_tgta_ud <- lapply_get_centered_motif_sites(motif="TGTA") %>%
mutate(origin=factor(origin, levels=c("GENCODE", "upstream", "downstream")))
df_pas_ud <- lapply_get_centered_motif_sites(motif="AWTAAA") %>%
mutate(origin=factor(origin, levels=c("GENCODE", "upstream", "downstream")))
df_tktktk_ud <- lapply_get_centered_motif_sites(motif="TKTKTK") %>%
mutate(origin=factor(origin, levels=c("GENCODE", "upstream", "downstream")))
df_combined_ud <- rbind(df_tgta_ud, df_pas_ud, df_tktktk_ud)
df_combined_ud %>%
ggplot(aes(x=center, color=motif, linetype=origin)) +
stat_density(aes(y=..scaled..), geom='line', position='identity',
size=1.0, alpha=0.9) +
geom_hline(yintercept=0) +
geom_vline(xintercept=0, linetype='dashed', color='black') +
coord_cartesian(xlim=c(-RADIUS, RADIUS)) +
scale_x_continuous(breaks=seq(-RADIUS, RADIUS, 100),
limits=c(-RADIUS - 100, RADIUS+100)) +
scale_color_manual(values=c(TGTA="#73ADD6", AWTAAA="#358C44", TKTKTK="#F36B4D")) +
scale_linetype_manual(values=c("GENCODE"="solid", "upstream"="dotted", "downstream"="dashed")) +
labs(x="Distance from RefSeq Cleavage Site",
y="Relative Density", color="Motif", linetype="Annotation") +
guides(color=guide_legend(override.aes=list(alpha=1, size=3))) +
theme_minimal_vgrid()
df_tgta_ud %>%
ggplot(aes(x=center, color=motif, linetype=origin)) +
stat_density(aes(y=..scaled..), geom='line', position='identity',
size=1.0, alpha=0.9) +
geom_hline(yintercept=0) +
geom_vline(xintercept=0, linetype='dashed', color='black') +
coord_cartesian(xlim=c(-RADIUS, RADIUS)) +
scale_x_continuous(breaks=seq(-RADIUS, RADIUS, 100),
limits=c(-RADIUS - 100, RADIUS+100)) +
scale_color_manual(values=c(TGTA="#73ADD6", AWTAAA="#358C44", TKTKTK="#F36B4D")) +
scale_linetype_manual(values=c("GENCODE"="solid", "upstream"="dotted", "downstream"="dashed")) +
labs(x="Distance from RefSeq Cleavage Site",
y="Relative Density", color="Motif", linetype="Annotation") +
guides(color=FALSE) +
theme_minimal_vgrid()
df_pas_ud %>%
ggplot(aes(x=center, color=motif, linetype=origin)) +
stat_density(aes(y=..scaled..), geom='line', position='identity',
size=1.0, alpha=0.9) +
geom_hline(yintercept=0) +
geom_vline(xintercept=0, linetype='dashed', color='black') +
coord_cartesian(xlim=c(-RADIUS, RADIUS)) +
scale_x_continuous(breaks=seq(-RADIUS, RADIUS, 100),
limits=c(-RADIUS - 100, RADIUS+100)) +
scale_color_manual(values=c(TGTA="#73ADD6", AWTAAA="#358C44", TKTKTK="#F36B4D")) +
scale_linetype_manual(values=c("GENCODE"="solid", "upstream"="dotted", "downstream"="dashed")) +
labs(x="Distance from RefSeq Cleavage Site",
y="Relative Density", color="Motif", linetype="Annotation") +
guides(color=FALSE) +
theme_minimal_vgrid()
df_tktktk_ud %>%
ggplot(aes(x=center, color=motif, linetype=origin)) +
stat_density(aes(y=..scaled..), geom='line', position='identity',
size=1.0, alpha=0.9) +
geom_hline(yintercept=0) +
geom_vline(xintercept=0, linetype='dashed', color='black') +
coord_cartesian(xlim=c(-RADIUS, RADIUS)) +
scale_x_continuous(breaks=seq(-RADIUS, RADIUS, 100),
limits=c(-RADIUS - 100, RADIUS+100)) +
scale_color_manual(values=c(TGTA="#73ADD6", AWTAAA="#358C44", TKTKTK="#F36B4D")) +
scale_linetype_manual(values=c("GENCODE"="solid", "upstream"="dotted", "downstream"="dashed")) +
labs(x="Distance from RefSeq Cleavage Site",
y="Relative Density", color="Motif", linetype="Annotation") +
guides(color=FALSE) +
theme_minimal_vgrid()
seqs <- getSeq(BSgenome.Hsapiens.UCSC.hg38, gr_cleavage)
df_tgta <- get_centered_motif_sites("TGTA", seqs)
df_pas <- get_centered_motif_sites(motif="AWTAAA", seqs)
df_tktktk <- get_centered_motif_sites(motif="TKTKTK", seqs)
df_caatta <- get_centered_motif_sites(motif="CAATTA", seqs)
rbind(df_tgta, df_pas, df_tktktk, df_caatta) %>%
plot_motif_density(title="All", col_values=COLOR_VALS_EXTRA)
seqs <- getSeq(BSgenome.Hsapiens.UCSC.hg38, gr_single)
df_tgta <- get_centered_motif_sites("TGTA", seqs)
df_pas <- get_centered_motif_sites(motif="AWTAAA", seqs)
df_tktktk <- get_centered_motif_sites(motif="TKTKTK", seqs)
df_caatta <- get_centered_motif_sites(motif="CAATTA", seqs)
rbind(df_tgta, df_pas, df_tktktk, df_caatta) %>%
plot_motif_density(title="Single-UTR Genes", col_values=COLOR_VALS_EXTRA)
seqs <- getSeq(BSgenome.Hsapiens.UCSC.hg38, gr_proximal_gc)
df_tgta <- get_centered_motif_sites("TGTA", seqs)
df_pas <- get_centered_motif_sites(motif="AWTAAA", seqs)
df_tktktk <- get_centered_motif_sites(motif="TKTKTK", seqs)
df_caatta <- get_centered_motif_sites(motif="CAATTA", seqs)
rbind(df_tgta, df_pas, df_tktktk, df_caatta) %>%
plot_motif_density(title="GENCODE Proximal Sites", col_values=COLOR_VALS_EXTRA)
seqs <- getSeq(BSgenome.Hsapiens.UCSC.hg38, gr_nondistal_gc)
df_tgta <- get_centered_motif_sites("TGTA", seqs)
df_pas <- get_centered_motif_sites(motif="AWTAAA", seqs)
df_tktktk <- get_centered_motif_sites(motif="TKTKTK", seqs)
df_caatta <- get_centered_motif_sites(motif="CAATTA", seqs)
rbind(df_tgta, df_pas, df_tktktk, df_caatta) %>%
plot_motif_density(title="GENCODE Non-Distal Sites", col_values=COLOR_VALS_EXTRA)
seqs <- getSeq(BSgenome.Hsapiens.UCSC.hg38, gr_distal_gc)
df_tgta <- get_centered_motif_sites("TGTA", seqs)
df_pas <- get_centered_motif_sites(motif="AWTAAA", seqs)
df_tktktk <- get_centered_motif_sites(motif="TKTKTK", seqs)
df_caatta <- get_centered_motif_sites(motif="CAATTA", seqs)
rbind(df_tgta, df_pas, df_tktktk, df_caatta) %>%
plot_motif_density(title="GENCODE Distal Sites", col_values=COLOR_VALS_EXTRA)
## R version 4.2.2 (2022-10-31)
## Platform: x86_64-apple-darwin13.4.0 (64-bit)
## Running under: macOS Big Sur ... 10.16
##
## Matrix products: default
## BLAS/LAPACK: /Users/mfansler/miniconda3/envs/bioc_3_16/lib/libopenblasp-r0.3.21.dylib
##
## locale:
## [1] en_US.UTF-8/en_US.UTF-8/en_US.UTF-8/C/en_US.UTF-8/en_US.UTF-8
##
## attached base packages:
## [1] stats4 stats graphics grDevices utils datasets methods
## [8] base
##
## other attached packages:
## [1] cowplot_1.1.1 magrittr_2.0.3
## [3] lubridate_1.9.2 forcats_1.0.0
## [5] stringr_1.5.0 dplyr_1.1.0
## [7] purrr_1.0.1 readr_2.1.4
## [9] tidyr_1.3.0 tibble_3.1.8
## [11] ggplot2_3.4.1 tidyverse_2.0.0
## [13] plyranges_1.18.0 BSgenome.Hsapiens.UCSC.hg38_1.4.5
## [15] BSgenome_1.66.3 rtracklayer_1.58.0
## [17] Biostrings_2.66.0 XVector_0.38.0
## [19] GenomicRanges_1.50.0 GenomeInfoDb_1.34.9
## [21] IRanges_2.32.0 S4Vectors_0.36.0
## [23] BiocGenerics_0.44.0
##
## loaded via a namespace (and not attached):
## [1] MatrixGenerics_1.10.0 Biobase_2.58.0
## [3] sass_0.4.2 jsonlite_1.8.4
## [5] bslib_0.4.1 highr_0.9
## [7] GenomeInfoDbData_1.2.9 Rsamtools_2.14.0
## [9] yaml_2.3.6 pillar_1.8.1
## [11] lattice_0.20-45 glue_1.6.2
## [13] digest_0.6.30 colorspace_2.0-3
## [15] htmltools_0.5.4 Matrix_1.5-3
## [17] XML_3.99-0.12 pkgconfig_2.0.3
## [19] zlibbioc_1.44.0 scales_1.2.1
## [21] tzdb_0.3.0 BiocParallel_1.32.0
## [23] timechange_0.2.0 farver_2.1.1
## [25] generics_0.1.3 ellipsis_0.3.2
## [27] cachem_1.0.6 withr_2.5.0
## [29] SummarizedExperiment_1.28.0 cli_3.6.0
## [31] crayon_1.5.2 evaluate_0.18
## [33] fansi_1.0.3 textshaping_0.3.6
## [35] tools_4.2.2 hms_1.1.2
## [37] BiocIO_1.8.0 lifecycle_1.0.3
## [39] matrixStats_0.62.0 munsell_0.5.0
## [41] DelayedArray_0.24.0 compiler_4.2.2
## [43] jquerylib_0.1.4 systemfonts_1.0.4
## [45] rlang_1.1.0 grid_4.2.2
## [47] RCurl_1.98-1.9 rstudioapi_0.14
## [49] rjson_0.2.21 labeling_0.4.2
## [51] bitops_1.0-7 rmarkdown_2.18
## [53] restfulr_0.0.15 gtable_0.3.1
## [55] codetools_0.2-18 R6_2.5.1
## [57] GenomicAlignments_1.34.0 knitr_1.40
## [59] fastmap_1.1.0 utf8_1.2.2
## [61] ragg_1.2.5 stringi_1.7.8
## [63] parallel_4.2.2 vctrs_0.6.1
## [65] tidyselect_1.2.0 xfun_0.34